Code
import { aq, op } from "@uwdata/arquero"import { aq, op } from "@uwdata/arquero"We build data sets that contain the marginal distribution of the CURF variables, at various levels of spatial resolution. We begin with CEDs. We use ABS TableBuilder to first subset to adult Australian citizens to provide the marginal distributions of these variables.
We do this for CED, SED and SA1 levels.
From TableBuilder we have counts of adult citizens in CED/SA4 combinations. Many of these will be zero.
ced_sa4 <- ced_read_function("SA4")
sed_sa4 <- sed_read_function("SA4")
sa4 <- bind_rows(ced_sa4,sed_sa4) %>%
pivot_longer(cols = !Division & !state,
names_to = "SA4_name",
values_to = "n") %>%
filter(!grepl(pattern = "^\\.{3}", SA4_name)) %>%
filter(SA4_name != "Total") %>%
filter(n > 0)
load(here("common_data/Census/2016/curf/sa4_codes.RData"))
sa4 <- sa4 %>%
left_join(sa4_codes %>%
mutate(
SA4_CODE21 = str_extract(pattern = "^[0-9]{3}", SA4_class_code),
SA4_name = str_remove(pattern = "^[0-9]{3} ", SA4_class_code)
),
by = "SA4_name")
areaenum <- sa4 %>%
group_by(state,Division,AREAENUM) %>%
summarise(n = sum(n)) %>%
ungroup() %>%
group_by(state,Division) %>%
mutate(p = n/sum(n)*100) %>%
ungroup() %>%
left_join(sa4_codes %>% distinct(AREAENUM,description),
by = "AREAENUM")## RELP_collapsed
relp <- bind_rows(
ced_read_function("RELP_COLLAPSED"),
sed_read_function("RELP")
) %>%
pivot_longer(cols = !Division & !state,
names_to = "RELP",
values_to = "n") %>%
filter(!grepl(pattern = "^\\.{3}",RELP)) %>%
filter(RELP != "Total") %>%
mutate(
RELP_collapsed = case_when(
RELP %in% c("Anglican",
"Baptist",
"Brethren",
"Churches of Christ",
"Jehovah's Witnesses",
"Latter-day Saints",
"Lutheran",
"Other Protestant",
"Pentecostal",
"Uniting Church",
"Salvation Army",
"Seventh-day Adventist") ~ "Protestant",
RELP == "Eastern Orthodox" ~ "Greek Orthodox",
RELP == "Presbyterian and Reformed" ~ "Presbyterian",
RELP %in% c("No Religion (so described)",
"No Religion, (so described)",
"Other Spiritual Beliefs",
"Secular Beliefs",
"Secular Beliefs and Other Spiritual Beliefs and No Religious Affiliation, nfd",
"Secular Beliefs and Other Spiritual Beliefs") ~ "No Religion (so described)",
RELP %in% c("Christianity, nfd", "Other Christian") ~ "Other Christian",
RELP %in% c("Buddhism","Catholic","Hinduism","Islam") ~ RELP,
RELP %in% c("Not stated","Inadequately described") ~ NA,
TRUE ~ "Judaism and Other Religions"
),
RELP_collapsed = factor(RELP_collapsed)
)adjustment_missing <- curf %>%
count(RELP_collapsed) %>%
mutate(RELP_collapsed = if_else(is.na(RELP_collapsed), "Missing", RELP_collapsed)) %>%
filter(RELP_collapsed != "Overseas visitor") %>%
mutate(p = n / sum(n) * 100) %>%
arrange(desc(p)) %>%
left_join(test_data %>%
count(RELP_collapsed) %>%
mutate(p = n / sum(n) * 100) %>%
arrange(desc(p)),
by = "RELP_collapsed") %>%
mutate(w = (p.y - p.x)/p.x[RELP_collapsed == "Missing"],
w = if_else(w < 0, 0, w),
w = w/sum(w,na.rm = TRUE))
adjust <- function(data){
z <- left_join(data,
adjustment_missing %>%
select(RELP_collapsed,w),
by = "RELP_collapsed") %>%
mutate(n_adj = n + w*n[is.na(RELP_collapsed)])
return(z)
}
relp <- relp %>%
group_by(state,Division,RELP_collapsed) %>%
summarise(n = sum(n, na.rm = TRUE)) %>%
ungroup() %>%
group_nest(state,Division) %>%
mutate(d = map(.x = data,
.f = ~adjust(.x))
) %>%
ungroup() %>%
unnest(d) %>%
select(-data,-w) %>%
filter(!is.na(RELP_collapsed)) %>%
rename(n_raw = n,
n = n_adj)relp_wide <- relp %>%
select(state,Division, RELP_collapsed, n) %>%
group_by(state,Division) %>%
mutate(p = n / sum(n) * 100) %>%
ungroup() %>%
select(state,Division, RELP_collapsed, p) %>%
pivot_wider(id_cols = c("state","Division"),
names_from = "RELP_collapsed",
values_from = "p") %>%
rename(Other = 'Judaism and Other Religions',
Orthodox = "Greek Orthodox",
None = "No Religion (so described)")
ojs_define(relp_wide_raw = relp_wide)relp_tab = transpose(relp_wide_raw)
Inputs.table(relp_tab,
{
columns: [ "state", "Division", "None", "Catholic", "Orthodox",
"Protestant", "Presbyterian", "Other Christian",
"Other", "Buddhism", "Hinduism", "Islam", "Other" ],
format: {
Buddhism: x => d3.format(".1f")(x),
Catholic: x => d3.format(".1f")(x),
Orthodox: x => d3.format(".1f")(x),
Hinduism: x => d3.format(".1f")(x),
Islam: x => d3.format(".1f")(x),
Other: x => d3.format(".1f")(x),
None: x => d3.format(".1f")(x),
"Other Christian": x => d3.format(".1f")(x),
"Presbyterian": x => d3.format(".1f")(x),
Protestant: x => d3.format(".1f")(x)
}
}
)Closeness (total variation norm) to population distribution:
ojs_define(
relp_tv = left_join(
relp %>%
select(state,Division, RELP_collapsed, n) %>%
group_by(state,Division) %>%
mutate(p = n / sum(n) * 100) %>%
ungroup() %>%
select(state,Division, RELP_collapsed, p),
test_data %>%
count(RELP_collapsed) %>%
mutate(p = n / sum(n) * 100),
by = "RELP_collapsed") %>%
group_by(state,Division) %>%
summarise(tv = sum(abs(p.x - p.y))) %>%
ungroup() %>%
arrange(tv)
)Inputs.table(transpose(relp_tv))sa1_relp <- sa1_read_function(v="RELP")sexp %>%
filter(SEXP == "Female") %>%
ggplot(.,
aes(x = p)) +
geom_histogram() +
theme_minimal(base_family = "Avenir Next")sexp_tab = transpose(sexp_tab_raw)
Inputs.table(sexp_tab)sa1_sexp <- sa1_read_function("SEXP") %>%
group_by(SA1) %>%
mutate(p = n/sum(n) * 100) %>%
ungroup()Two files are read here. First, HEAP or level of Highest Educational Attainment.
Second, we also extract counts of those having completed high school from Census variable HSCP:
Third, we extract counts of adult citizens with university levels qualifications:
sa1_heap <- sa1_read_function("HEAP")
sa1_hscp <- sa1_read_function("HSCP")ingp <- bind_rows(
ced_read_function("INGP"),
sed_read_function("INGP")
) %>%
pivot_longer(cols = -c(Division,state),
names_to = "INGP",
values_to = "n") %>%
filter(INGP != "Total") %>%
mutate(INGP_binary = INGP %in%
c("Aboriginal",
"Torres Strait Islander",
"Both Aboriginal and Torres Strait Islander")
) %>%
group_by(state,Division,INGP_binary) %>%
summarise(n = sum(n)) %>%
ungroup() %>%
group_by(state,Division) %>%
mutate(p = n/sum(n)*100) %>%
ungroup() ojs_define(ingp_raw = ingp %>%
pivot_wider(id_cols = Division,
names_from = "INGP_binary",
values_from = c("p")
)
)Inputs.table(transpose(ced_ingp_raw))incp <- bind_rows(
ced_read_function("INCP"),
sed_read_function("INCP")
) %>%
pivot_longer(cols = !Division & !state,
names_to = "INCP",
values_to = "n") %>%
filter(INCP != "Total") %>%
mutate(
INCP = str_remove(pattern = "^\\$.*\\(", INCP),
INCP = str_remove(pattern = "\\)$", INCP)
)
incp_continuous <- incp %>%
mutate(
INCP_continuous = case_when(
INCP %in% c("Negative income",
"Nil income",
"$1-$7,799",
"$7,800-$15,599") ~ 1,
## "Less than $15,000 per year"
INCP %in% c("$15,600-$20,799",
"$20,800-$25,999") ~ 2,
## "$15,001 to $25,000 per year"
INCP %in% c("$26,000-$33,799") ~ 3,
## "$25,001 to $35,000 per year"
INCP %in% c("$33,800-$41,599") ~ 4,
## "$35,001 to $45,000 per year"
INCP %in% c("$41,600-$51,999",
"$52,000-$64,999") ~ 5,
## "$45,001 to $60,000 per year"
INCP %in% "$65,000-$77,999" ~ 6,
## "$60,001 to $80,000 per year"
INCP %in% c("$78,000-$90,999",
"$91,000-$103,999") ~ 7,
## "$80,001 to $100,000 per year"
INCP == "$104,000-$155,999" ~ 8,
INCP %in% c("$156,000-$181,999", "$182,000 or more") ~ 9,
TRUE ~ 999
)
) %>%
group_by(state, Division, INCP_continuous) %>%
summarise(n = sum(n)) %>%
ungroup() %>%
group_by(state, Division) %>%
mutate(p = n / sum(n) * 100) %>%
ungroup()
adjustment_missing <- curf %>%
filter(INCP_original != "Overseas visitor") %>%
filter(!(AGEP %in% as.character(0:17))) %>%
mutate(
INCP_continuous = case_when(
INCP_original %in% c("Negative income",
"Nil income",
"$1-$7,799",
"$7,800-$15,599") ~ 1, ## "Less than $15,000 per year"
INCP_original %in% c("$15,600-$20,799",
"$20,800-$25,999") ~ 2, ## "$15,001 to $25,000 per year"
INCP_original %in% c("$26,000-$33,799") ~ 3, ## "$25,001 to $35,000 per year"
INCP_original %in% c("$33,800-$41,599") ~ 4, ## "$35,001 to $45,000 per year"
INCP_original %in% c("$41,600-$51,999",
"$52,000-$64,999") ~ 5, ## "$45,001 to $60,000 per year"
INCP_original %in% "$65,000-$77,999" ~ 6, ## "$60,001 to $80,000 per year"
INCP_original %in% c("$78,000-$90,999",
"$91,000-$103,999") ~ 7, ## "$80,001 to $100,000 per year"
INCP_original == "$104,000-$155,999" ~ 8,
INCP_original %in% c("$156,000>") ~ 9,
TRUE ~ 999)
) %>%
count(INCP_continuous) %>%
mutate(p = n / sum(n) * 100) %>%
arrange(desc(p)) %>%
left_join(test_data %>%
count(INCP_continuous) %>%
mutate(p = n / sum(n) * 100) %>%
arrange(desc(p)),
by = "INCP_continuous") %>%
mutate(w = (p.y - p.x)/p.x[INCP_continuous == 999],
w = if_else(w < 0, 0, w),
w = w/sum(w,na.rm = TRUE))
adjust <- function(data){
z <- left_join(data,
adjustment_missing %>%
select(INCP_continuous,w),
by = "INCP_continuous") %>%
mutate(n_adj = n + w*n[INCP_continuous == 999])
return(z)
}
incp_continuous_adjusted <- incp_continuous %>%
group_nest(state,Division) %>%
mutate(d = map(.x = data,
.f = ~adjust(.x))
) %>%
ungroup() %>%
unnest(d) %>%
select(-data,w) %>%
filter(!is.na(INCP_continuous)) %>%
rename(n_raw = n,
n = n_adj) %>%
group_by(state,Division) %>%
mutate(p = n/sum(n,na.rm = TRUE)*100) %>%
ungroup()ojs_define(incp_raw = incp_continuous %>%
pivot_wider(id_cols = c("state","Division"),
names_from = "INCP_continuous",
values_from = "p")
)Inputs.table(transpose(incp_raw),
{
columns: [ "state", "Division", "1", "2", "3", "4", "5", "6", "7", "8", "9" ],
format: {
"1": x => d3.format(".1f")(x),
"2": x => d3.format(".1f")(x),
"3": x => d3.format(".1f")(x),
"4": x => d3.format(".1f")(x),
"5": x => d3.format(".1f")(x),
"6": x => d3.format(".1f")(x),
"7": x => d3.format(".1f")(x),
"8": x => d3.format(".1f")(x),
"9": x => d3.format(".1f")(x)
}
}
)Closeness (total variation norm) to population distribution:
Inputs.table(transpose(incp_tv))sa1_incp <- sa1_read_function("INCP") %>%
mutate(
j = str_remove(pattern = "^\\$.*\\(", j),
j = str_remove(pattern = "\\)$", j)
)
sa1_incp_continuous <- sa1_incp %>%
mutate(
k = case_when(
j %in% c("Negative income",
"Nil income",
"$1-$7,799",
"$7,800-$15,599") ~ 1,
## "Less than $15,000 per year"
j %in% c("$15,600-$20,799",
"$20,800-$25,999") ~ 2,
## "$15,001 to $25,000 per year"
j %in% c("$26,000-$33,799") ~ 3,
## "$25,001 to $35,000 per year"
j %in% c("$33,800-$41,599") ~ 4,
## "$35,001 to $45,000 per year"
j %in% c("$41,600-$51,999",
"$52,000-$64,999") ~ 5,
## "$45,001 to $60,000 per year"
j %in% "$65,000-$77,999" ~ 6,
## "$60,001 to $80,000 per year"
j %in% c("$78,000-$90,999",
"$91,000-$103,999") ~ 7,
## "$80,001 to $100,000 per year"
j == "$104,000-$155,999" ~ 8,
j %in% c("$156,000-$181,999", "$182,000 or more") ~ 9,
TRUE ~ 999
)
) %>%
group_by(SA1,k) %>%
summarise(n = sum(n)) %>%
ungroup() %>%
rename(j = k) %>%
group_by(SA1) %>%
mutate(p = n / sum(n) * 100) %>%
ungroup()
sa1_incp_continuous_adjusted <- sa1_incp_continuous %>%
rename(INCP_continuous = j) %>%
group_nest(SA1) %>%
mutate(d = map(.x = data,
.f = ~adjust(.x))
) %>%
ungroup() %>%
unnest(d) %>%
select(-data,w) %>%
filter(!is.na(INCP_continuous)) %>%
rename(n_raw = n,
n = n_adj) %>%
group_by(SA1) %>%
mutate(p = n/sum(n,na.rm = TRUE)*100) %>%
ungroup()From the persons, place of enumeration database in TableBuilder, we extract HIND (total household income) for CEDs and SEDs.
hind <- bind_rows(
ced_read_function("HIND"),
sed_read_function("HIND")
) %>%
pivot_longer(cols = !Division & !state,
names_to = "HIND",
values_to = "n") %>%
filter(HIND != "Total") %>%
mutate(
HIND = str_remove(pattern = "^\\$.*\\(", HIND),
HIND = str_remove(pattern = "\\)$", HIND)
)
levs <- hind %>% distinct(HIND) %>% pull(HIND)
hind <- hind %>% mutate(HIND = factor(HIND,levels=levs,ordered = TRUE))sa1_hind <- sa1_read_function("HIND") %>%
mutate(
j = str_remove(pattern = "^\\$.*\\(", j),
j = str_remove(pattern = "\\)$", j)
)Australia, UK/NZ, all others.
born_country_three <-
bind_rows(
ced_read_function("born_country_three"),
sed_read_function("born_country_three"),
) %>%
rename(Australia = `Australia (includes External Territories)`) %>%
mutate(`UK/NZ` = `New Zealand` + `United Kingdom, Channel Islands and Isle of Man`) %>%
select(state,Division,Australia,`UK/NZ`) %>%
left_join(
sexp %>%
group_by(state,Division) %>%
summarise(Total = sum(n)) %>%
ungroup(),
by = c("state","Division")
) %>%
mutate(Other = Total - Australia - `UK/NZ`) %>%
select(-Total) %>%
pivot_longer(cols = !Division & !state,
names_to = "born_country_three",
values_to = "n") %>%
group_by(state,Division) %>%
mutate(p=n/sum(n)*100) %>%
ungroup()ojs_define(born_country_three_raw = born_country_three %>%
pivot_wider(id_cols = Division,
names_from = born_country_three,
values_from = "p")
)Inputs.table(transpose(born_country_three))Closeness (total variation norm) to population distribution:
Inputs.table(transpose(bcp_tv))Two TableBuilder files are used here, one with the 5 year buckets, the other to get the counts in discrete ages 18 through 24.
age_five <- bind_rows(
ced_read_function("AGE5P"),
sed_read_function("AGE5P")
) %>%
pivot_longer(cols = !Division & !state,
names_to = "AGE5P",
values_to = "n") %>%
filter(n > 0) %>%
filter(AGE5P != "Total") %>%
group_by(state,Division) %>%
mutate(p = n/sum(n)*100) %>%
ungroup()
age_young <- bind_rows(
ced_read_function("AGE_18_24",10),
sed_read_function("AGE_18_24",10)
) %>%
pivot_longer(cols = !Division & !state,
names_to = "AGE_18_24",
values_to = "n") %>%
filter(n > 0) %>%
filter(AGE_18_24 != "Total") %>%
group_by(state,Division) %>%
mutate(p = n/sum(n)*100) %>%
ungroup()
agep <- bind_rows(
age_young %>%
rename(age = AGE_18_24),
age_five %>%
rename(age = AGE5P) %>%
filter(age != "15-19 years") %>%
filter(age != "20-24 years")
) %>%
mutate(
AGEP = str_remove(pattern = " years.*$",age),
AGEP = if_else(AGEP %in% c("85-89","90-94","95-99","100"),
"85>",
AGEP),
AGEP = factor(
AGEP,
levels = c(
as.character(
0:24),
"25-29",
"30-34",
"35-39",
"40-44",
"45-49",
"50-54",
"55-59",
"60-64",
"65-69",
"70-74",
"75-79",
"80-84",
"85>"),
ordered = TRUE)
) %>%
group_by(state,Division,AGEP) %>%
summarise(n = sum(n)) %>%
ungroup() %>%
select(state,Division,AGEP,n)
agep_continuous <- agep %>%
mutate(
AGE_continuous = case_when(
AGEP %in% as.character(18:24) ~ as.numeric(AGEP),
AGEP == "25-29" ~ 27,
AGEP == "30-34" ~ 32,
AGEP == "35-39" ~ 37,
AGEP == "40-44" ~ 42,
AGEP == "45-49" ~ 47,
AGEP == "50-54" ~ 52,
AGEP == "55-59" ~ 57,
AGEP == "60-64" ~ 62,
AGEP == "65-69" ~ 67,
AGEP == "70-74" ~ 72,
AGEP == "75-79" ~ 77,
AGEP == "80-84" ~ 82,
TRUE ~ 87
)
) %>%
group_by(state,Division,AGE_continuous) %>%
summarise(n = sum(n)) %>%
ungroup() %>%
select(state,Division,AGE_continuous,n)sa1_age5p <- sa1_read_function("AGE5P")Read hierarchy files from ABS:
ancestry_hierarchy <- readxl::read_xls(
path = here(
"common_data/Census/2021/meta/ancestry_hierarchy.xls"
),
sheet = 4,
skip = 7,
trim_ws = TRUE,
col_names = c("row_name","a1","a2","a3")
) %>%
mutate(a1 = if_else(!grepl("[A_z]",a1),NA,a1)) %>%
tidyr::fill(a1) %>%
group_by(a1) %>%
tidyr::fill(a2, .direction = "up") %>%
mutate(a2 = if_else(grepl("^[0-9]",a2),NA,a2)) %>%
tidyr::fill(a2) %>%
tidyr::fill(a3, .direction = "up") %>%
ungroup() %>%
select(-row_name) %>%
mutate(a1 = str_to_title(a1)) %>%
mutate(a1 = gsub(pattern = "And",
replacement = "and",
a1),
a1 = gsub(pattern = "Of",
replacement = "of",
a1),
a1 = gsub(pattern = "The",
replacement = "the",
a1),
a3 = gsub(pattern = "\\, nec$",
replacement = "",
a3),
a3 = gsub(pattern = "\\, nfd$",
replacement = "",
a3)
) %>%
filter(a1 != "Total") %>%
filter(a2 != "Total") %>%
filter(a3 != "Total") %>%
distinct()ancp_raw <- bind_rows(
ced_read_function("ANC1P"),
sed_read_function("ANC1P")
) %>%
mutate(`Oceanian, nfd` = as.double(`Oceanian, nfd`)) %>%
pivot_longer(cols = where(is.double),
names_to = "ancp",
values_to = "n",
values_drop_na = TRUE)
ancp_full <- ancp_raw %>%
mutate(ancp = gsub(pattern = "\\, nfd$",
replacement = "",
ancp)) %>%
mutate(ancp = gsub(pattern = "\\, nec$",
replacement = "",
ancp)) %>%
mutate(ancp = gsub(pattern = "\\, so described$",
replacement = "",
ancp)) %>%
filter(ancp != "Total") %>%
group_by(state,Division,ancp) %>%
summarise(n = sum(n)) %>%
ungroup() %>%
left_join(ancestry_hierarchy,
by = c("ancp" = "a3")
) %>%
mutate(
a1 = if_else(ancp == "Creole",
"Peoples of the Americas",
a1),
a1 = if_else(ancp == "African",
"Sub-Saharan African",
a1),
a1 = if_else(ancp == "Asian",
"North-East Asian",
a1),
a1 = if_else(ancp == "Caucasian" | ancp == "European" | ancp == "Eurasian",
"North-West European",
a1),
a2 = if_else(
is.na(a2),
ancestry_hierarchy$a2[match(ancp,ancestry_hierarchy$a2)],
a2),
a1 = if_else(
is.na(a1),
ancestry_hierarchy$a1[match(ancp,ancestry_hierarchy$a2)],
a1),
## try matching at level 1
a2 = if_else(
is.na(a2),
ancestry_hierarchy$a2[match(ancp,ancestry_hierarchy$a1)],
a2),
a1 = if_else(
is.na(a1),
ancestry_hierarchy$a1[match(ancp,ancestry_hierarchy$a1)],
a1),
a1 = if_else(ancp %in% c("Inadequately described",
"Not stated"),
ancp,
a1),
a2 = if_else(ancp %in% c("Inadequately described",
"Not stated"),
ancp,
a2)
)
ancp_level1 <- ancp_full %>%
group_by(state,Division,a1) %>%
summarise(n = sum(n)) %>%
ungroup() %>%
group_by(state,Division) %>%
mutate(p = n/sum(n)) %>%
ungroup()sa1_ancp_raw <- sa1_read_function("ANC1P")
sa1_ancp_full <- sa1_ancp_raw %>%
rename(ancp = j) %>%
mutate(ancp = gsub(pattern = "\\, nfd$",
replacement = "",
ancp)) %>%
mutate(ancp = gsub(pattern = "\\, nec$",
replacement = "",
ancp)) %>%
mutate(ancp = gsub(pattern = "\\, so described$",
replacement = "",
ancp)) %>%
filter(ancp != "Total") %>%
group_by(SA1,ancp) %>%
summarise(n = sum(n)) %>%
ungroup() %>%
left_join(ancestry_hierarchy,
by = c("ancp" = "a3")
) %>%
mutate(
a1 = if_else(ancp == "Creole",
"Peoples of the Americas",
a1),
a1 = if_else(ancp == "African",
"Sub-Saharan African",
a1),
a1 = if_else(ancp == "Asian",
"North-East Asian",
a1),
a1 = if_else(ancp == "Caucasian" | ancp == "European" | ancp == "Eurasian",
"North-West European",
a1),
a2 = if_else(
is.na(a2),
ancestry_hierarchy$a2[match(ancp,ancestry_hierarchy$a2)],
a2),
a1 = if_else(
is.na(a1),
ancestry_hierarchy$a1[match(ancp,ancestry_hierarchy$a2)],
a1),
## try matching at level 1
a2 = if_else(
is.na(a2),
ancestry_hierarchy$a2[match(ancp,ancestry_hierarchy$a1)],
a2),
a1 = if_else(
is.na(a1),
ancestry_hierarchy$a1[match(ancp,ancestry_hierarchy$a1)],
a1),
a1 = if_else(ancp %in% c("Inadequately described",
"Not stated"),
ancp,
a1),
a2 = if_else(ancp %in% c("Inadequately described",
"Not stated"),
ancp,
a2)
)
sa1_ancp_level1 <- sa1_ancp_full %>%
group_by(SA1,a1) %>%
summarise(n = sum(n)) %>%
ungroup() %>%
group_by(SA1) %>%
mutate(p = n/sum(n)) %>%
ungroup()ancp_collapse <- bind_rows(
ced_read_function("ANC1P"),
sed_read_function("ANC1P")
) %>%
mutate(`Oceanian, nfd` = as.double(`Oceanian, nfd`)) %>%
pivot_longer(cols = where(is.double),
names_to = "ancp",
values_to = "n",
values_drop_na = TRUE) %>%
mutate(
ANCP_collapse = case_when(
grepl("^Australian",ancp) ~ "Australian",
grepl("^Chinese",ancp) ~ "Chinese",
ancp == "North-East Asian, nfd" ~ "Chinese",
grepl("^English",ancp) |
grepl("^British",ancp) |
ancp=="Welsh" ~ "English",
ancp == "German" ~ "German",
ancp == "Irish" ~ "Irish",
ancp == "Italian" ~ "Italian",
ancp == "Jewish" ~ "Jewish",
ancp == "Greek" ~ "Greek",
ancp == "Scottish" ~ "Scottish",
TRUE ~ "Other"
)
) %>%
group_by(state,Division,ANCP_collapse) %>%
summarise(n = sum(n)) %>%
ungroup() %>%
group_by(state,Division) %>%
mutate(p = n/sum(n)) %>%
ungroup()sa1_ancp <- sa1_read_function("ANC1P") %>%
rename(ancp = j) %>%
mutate(
ANCP_collapse = case_when(
grepl("^Australian",ancp) ~ "Australian",
grepl("^Chinese",ancp) ~ "Chinese",
ancp == "North-East Asian, nfd" ~ "Chinese",
grepl("^English",ancp) |
grepl("^British",ancp) |
ancp=="Welsh" ~ "English",
ancp == "German" ~ "German",
ancp == "Irish" ~ "Irish",
ancp == "Italian" ~ "Italian",
ancp == "Jewish" ~ "Jewish",
ancp == "Greek" ~ "Greek",
ancp == "Scottish" ~ "Scottish",
TRUE ~ "Other"
)
) %>%
group_by(SA1,ANCP_collapse) %>%
summarise(n = sum(n)) %>%
ungroup() %>%
group_by(SA1) %>%
mutate(p = n/sum(n)) %>%
ungroup() %>%
rename(j = ANCP_collapse)lanp <- bind_rows(
ced_read_function("LANP_3"),
sed_read_function("LANP_3")
) %>%
pivot_longer(cols = !Division & !state,
names_to = "LANP",
values_to = "n") %>%
filter(LANP != "Total")sa1_lanp <- sa1_read_function("LANP")marginals <- bind_rows(
relp %>%
mutate(var = "RELP_collapsed") %>%
rename(j = RELP_collapsed),
sexp %>%
mutate(var = "SEXP") %>%
rename(j = SEXP),
heap %>% mutate(var = "HEAP"),
hs %>% mutate(var = "high_school"),
uni %>% mutate(var = "uni") %>%
rename(j = uni),
ingp %>% mutate(var = "INGP_binary") %>%
rename(j = INGP_binary) %>%
mutate(j = as.character(j)),
incp %>% mutate(var = "INCP") %>%
rename(j = INCP) %>%
mutate(j = as.character(j)),
incp_continuous_adjusted %>% mutate(var = "INCP_continuous") %>%
rename(j = INCP_continuous) %>%
mutate(j = as.character(j)),
hind %>% mutate(var = "HIND") %>%
rename(j = HIND),
igap %>%
mutate(var = "IGAP") %>%
rename(j = IGAP),
occp %>%
mutate(var = "OCCP") %>%
rename(j = OCCP),
born_country_three %>% mutate(var = "born_country_three") %>%
rename(j = born_country_three),
areaenum %>% mutate(var = "AREAENUM") %>%
rename(j = description) %>%
select(-AREAENUM),
age_five %>% mutate(var = "AGE5P") %>%
rename(j = AGE5P),
lanp %>% mutate(var = "LANP") %>%
rename(j = LANP),
agep %>% mutate(var = "AGEP") %>%
rename(j = AGEP),
agep_continuous %>%
mutate(var = "AGE_continuous") %>%
rename(j = "AGE_continuous") %>%
mutate(j = as.character(j)),
ancp_level1 %>%
mutate(var = "ANCP_1") %>%
rename(j = a1) %>%
mutate(j = as.character(j)),
ancp_collapse %>%
mutate(var = "ANCP_collapse") %>%
rename(j = ANCP_collapse) %>%
mutate(j = as.character(j)),
) %>%
group_by(var, Division) %>%
mutate(p = n / sum(n)) %>%
ungroup() %>%
select(-n_raw)
save(marginals,
file = here("common_data/Census/2021/CED/marginals.RData"))sa1_marginals <- bind_rows(
sa1_relp %>%
mutate(var = "RELP_collapsed"),
sa1_sexp %>%
mutate(var = "SEXP"),
sa1_heap %>%
mutate(var = "HEAP"),
sa1_hscp %>%
mutate(var = "high_school"),
#ingp %>% mutate(var = "INGP_binary") %>%
# rename(j = INGP_binary) %>%
# mutate(j = as.character(j)),
sa1_incp %>%
mutate(var = "INCP") %>%
mutate(j = as.character(j)),
sa1_incp_continuous %>%
mutate(var = "INCP_continuous") %>%
mutate(j = as.character(j)),
sa1_hind %>%
mutate(var = "HIND"),
##born_country_three %>% mutate(var = "born_country_three") %>%
## rename(j = born_country_three),
##areaenum %>% mutate(var = "AREAENUM") %>%
## rename(j = description) %>%
## select(-AREAENUM),
sa1_age5p %>%
mutate(var = "AGE5P"),
sa1_lanp %>%
mutate(var = "LANP"),
#ced_agep_continuous %>% mutate(var = "AGE_continuous") %>%
# rename(j = "AGE_continuous") %>% mutate(j = as.character(j)),
#ced_ancp_collapse %>% mutate(var = "ANCP_collapse") %>%
# rename(j = ANCP_collapse) %>% mutate(j = as.character(j)),
sa1_ancp_level1 %>%
mutate(var = "ANCP_1") %>%
rename(j = a1),
sa1_ancp %>%
mutate(var = "ANCP_collapse"),
sa1_sexp %>%
mutate(var = "SEXP")
) %>%
group_by(var,SA1) %>%
mutate(p = n/sum(n)) %>%
ungroup()
write_fst(sa1_marginals,
path = here(
"common_data/Census/2021/SA1/marginals.fst")
)